The goal of this script is to generate a Seurat object for sample F31B.
LogNormalize, then doublets
detection using scran hybrid and scDblFinder
method, and doublet cells removalLogNormalize, for only the remaining
cellsPCAtSNE and UMAPlibrary(dplyr)
library(patchwork)
library(ggplot2)
.libPaths()
## [1] "/usr/local/lib/R/library"
In this section, we set the global settings of the analysis. We will store data there :
out_dir = "."
We load the parameters :
sample_name = params$sample_name # "F31W"
# sample_name = "F31W"
Input count matrix is there :
count_matrix_dir = paste0(out_dir, "/input/", sample_name, "/raw_feature_bc_matrix")
We load the markers and specific colors for each cell type :
cell_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_cell_markers.rds"))
cell_markers = lapply(cell_markers, FUN = toupper)
lengths(cell_markers)
## CD4 T cells CD8 T cells Langerhans cells
## 13 13 9
## macrophages B cells cuticle
## 10 16 15
## cortex medulla IRS
## 16 10 16
## proliferative ORS IFE basal
## 20 15 16
## IFE granular spinous HF-SCs melanocytes
## 17 17 10
## sebocytes
## 8
Here are custom colors for each cell type :
color_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_color_markers.rds"))
data.frame(cell_type = names(color_markers),
color = unlist(color_markers)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank())
We load markers to display on the dotplot :
dotplot_markers = readRDS(paste0(out_dir, "/../../1_metadata/hs_hd_dotplot_markers.rds"))
dotplot_markers = lapply(dotplot_markers, FUN = toupper)
dotplot_markers
## $`CD4 T cells`
## [1] "CD3E" "CD4"
##
## $`CD8 T cells`
## [1] "CD3E" "CD8A"
##
## $`Langerhans cells`
## [1] "CD207" "CPVL"
##
## $macrophages
## [1] "TREM2" "MSR1"
##
## $`B cells`
## [1] "CD79A" "CD79B"
##
## $cuticle
## [1] "KRT32" "KRT35"
##
## $cortex
## [1] "KRT31" "PRR9"
##
## $medulla
## [1] "BAMBI" "ADLH1A3"
##
## $IRS
## [1] "KRT71" "KRT73"
##
## $proliferative
## [1] "TOP2A" "MCM5"
##
## $ORS
## [1] "KRT16" "KRT6C"
##
## $`IFE basal`
## [1] "KRT15" "SAA1"
##
## $`IFE granular spinous`
## [1] "SPINK5" "KRT1"
##
## $`HF-SCs`
## [1] "DIO2" "TCEAL2"
##
## $melanocytes
## [1] "DCT" "MLANA"
##
## $sebocytes
## [1] "CLMP" "PPARG"
We load metadata for this sample :
sample_info = readRDS(paste0(out_dir, "/../1_metadata/wu_sample_info.rds"))
sample_info %>%
dplyr::filter(project_name == sample_name)
## project_name sample_type sample_identifier platform gender location
## 1 F31B HD Wu_HD_2 10X F hair scalp
## laboratory color
## 1 Wu slateblue
These is a parameter for different functions :
cl = aquarius::create_parallel_instance(nthreads = 3L)
cut_log_nCount_RNA = 6
cut_nFeature_RNA = 500
cut_percent.mt = 20
cut_percent.rb = 50
In this section, we load the raw count matrix. Then, we applied an empty droplets filtering.
sobj = aquarius::load_sc_data(data_path = count_matrix_dir,
sample_name = sample_name,
my_seed = 1337L)
## [1] 27955 6794880
## [1] 123364408
## [1] 27955 77493
## [1] 121529265
## [1] 0.9851242
sobj
## An object of class Seurat
## 27955 features across 77493 samples within 1 assay
## Active assay: RNA (27955 features, 0 variable features)
(Time to run : 171.02 s)
We set an upper bound to 20,000 cells. Remaining cells correspond to ambient RNA.
if (ncol(sobj) > 20000) {
sobj$nb_UMI = Seurat::GetAssayData(sobj, assay = "RNA", slot = "counts") %>%
Matrix::colSums()
thresh_20k = sort(sobj$nb_UMI, decreasing = TRUE)[20000]
sobj = subset(sobj, nb_UMI >= thresh_20k)
sobj$nb_UMI = NULL
sobj
}
## An object of class Seurat
## 27955 features across 20135 samples within 1 assay
## Active assay: RNA (27955 features, 0 variable features)
In genes metadata, we add the Ensembl ID. The
sobj@assays$RNA@meta.features dataframe contains three
information :
rownames : gene names stored as the dimnames of the
count matrix. Duplicated gene names will have a .1 at the
end of their nameEnsembl_ID : EnsemblID, as stored in the
features.tsv.gz filegene_name : gene_name, as stored in the
features.tsv.gz file. Duplicated gene names will have the
same name.features_df = read.csv(paste0(count_matrix_dir, "/features.tsv.gz"), sep = "\t", header = 0)
features_df = features_df[, c(1:2)]
colnames(features_df) = c("Ensembl_ID", "gene_name")
rownames(features_df) = rownames(sobj) # mandatory for Seurat::FindVariableFeatures
sobj@assays$RNA@meta.features = features_df
rm(features_df)
head(sobj@assays$RNA@meta.features)
## Ensembl_ID gene_name
## MIR1302-2HG ENSG00000243485 MIR1302-2HG
## FAM138A ENSG00000237613 FAM138A
## OR4F5 ENSG00000186092 OR4F5
## AL627309.1 ENSG00000238009 AL627309.1
## AL627309.3 ENSG00000239945 AL627309.3
## AL627309.4 ENSG00000241599 AL627309.4
We add the same columns as in metadata :
row_oi = (sample_info$project_name == sample_name)
sobj$project_name = sample_name
sobj$sample_identifier = sample_info[row_oi, "sample_identifier"]
sobj$sample_type = sample_info[row_oi, "sample_type"]
sobj$location = sample_info[row_oi, "location"]
sobj$laboratory = sample_info[row_oi, "laboratory"]
colnames(sobj@meta.data)
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA"
## [4] "log_nCount_RNA" "project_name" "sample_identifier"
## [7] "sample_type" "location" "laboratory"
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize",
assay = "RNA")
sobj = Seurat::FindVariableFeatures(sobj,
assay = "RNA",
nfeatures = 3000)
sobj
## An object of class Seurat
## 27955 features across 20135 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
We generate a tSNE to visualize cells before filtering.
sobj = aquarius::dimensions_reduction(sobj = sobj,
assay = "RNA",
reduction = "pca",
max_dims = 100,
verbose = FALSE)
Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca")
We generate a tSNE with 20 principal components :
ndims = 20
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"))
sobj
## An object of class Seurat
## 27955 features across 20135 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We annotate cells for cell type using
Seurat::AddModuleScore function.
sobj = aquarius::cell_annot_custom(sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = TRUE,
verbose = TRUE)
colnames(sobj@meta.data) = stringr::str_replace_all(string = colnames(sobj@meta.data),
pattern = " ",
replacement = "_")
sobj$cell_type = factor(sobj$cell_type, levels = names(cell_markers))
table(sobj$cell_type)
##
## CD4 T cells CD8 T cells Langerhans cells
## 11 4 35
## macrophages B cells cuticle
## 18 126 2484
## cortex medulla IRS
## 6802 902 1549
## proliferative ORS IFE basal
## 435 5304 230
## IFE granular spinous HF-SCs melanocytes
## 1263 201 344
## sebocytes
## 427
(Time to run : 59.64 s)
To justify cell type annotation, we can make a dotplot :
markers = c("PTPRC", "MSX2", "KRT16",
unique(unlist(dotplot_markers[levels(sobj$cell_type)])))
markers = markers[markers %in% rownames(sobj)]
aquarius::plot_dotplot(sobj, assay = "RNA",
column_name = "cell_type",
markers = markers,
nb_hline = 0) +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(legend.position = "right",
legend.box = "vertical",
legend.direction = "vertical",
axis.title = element_blank(),
axis.text = element_text(size = 15))
We can make a barplot to see the composition of each dataset, and visualize cell types on the projection.
df_proportion = as.data.frame(prop.table(table(sobj$orig.ident,
sobj$cell_type)))
colnames(df_proportion) = c("orig.ident", "cell_type", "freq")
quantif = table(sobj$orig.ident) %>%
as.data.frame.table() %>%
`colnames<-`(c("orig.ident", "nb_cells"))
# Plot
plot_list = list()
plot_list[[2]] = aquarius::plot_barplot(df = df_proportion,
x = "orig.ident",
y = "freq",
fill = "cell_type",
position = ggplot2::position_fill()) +
ggplot2::scale_fill_manual(name = "Cell type",
values = color_markers[levels(df_proportion$cell_type)],
breaks = levels(df_proportion$cell_type)) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = orig.ident, y = 1.05, label = nb_cells),
label.size = 0)
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cell_type") +
ggplot2::scale_color_manual(values = unlist(color_markers),
breaks = names(color_markers)) +
ggplot2::labs(title = sample_name,
subtitle = paste0(ncol(sobj), " cells")) +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1, widths = c(6, 1))
We annotate cells for cell cycle phase using Seurat and
cyclone.
cc_columns = aquarius::add_cell_cycle(sobj = sobj,
assay = "RNA",
species_rdx = "hs",
BPPARAM = cl)@meta.data[, c("Seurat.Phase", "Phase")]
##
## G1 G2M S
## 14231 2336 3568
sobj$Seurat.Phase = cc_columns$Seurat.Phase
sobj$cyclone.Phase = cc_columns$Phase
table(sobj$Seurat.Phase, sobj$cyclone.Phase)
##
## G1 G2M S
## G1 6038 827 1800
## G2M 4638 1141 990
## S 3555 368 778
(Time to run : 1185.68 s)
We visualize cell cycle on the projection :
plot_list = list()
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "Seurat.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "cyclone.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1)
In this section, we look at the number of genes expressed by each cell, the number of UMI, the percentage of mitochondrial genes expressed, and the percentage of ribosomal genes expressed. Then, without taking into account the cells expressing low number of genes or have low number of UMI, we identify doublet cells.
We compute four quality metrics :
sobj = Seurat::PercentageFeatureSet(sobj, pattern = "^MT", col.name = "percent.mt")
sobj = Seurat::PercentageFeatureSet(sobj, pattern = "^RP[L|S][0-9]*$", col.name = "percent.rb")
head(sobj@meta.data)
## orig.ident nCount_RNA nFeature_RNA log_nCount_RNA
## AAACCCAAGAAGGGAT-1 F31B 36397 5126 10.502242
## AAACCCAAGCCAAGGT-1 F31B 636 342 6.455199
## AAACCCAAGCTCCGAC-1 F31B 555 338 6.318968
## AAACCCAAGCTGCGAA-1 F31B 544 350 6.298949
## AAACCCAAGCTTTCCC-1 F31B 566 353 6.338594
## AAACCCAAGGTCGCCT-1 F31B 792 484 6.674561
## project_name sample_identifier sample_type location
## AAACCCAAGAAGGGAT-1 F31B Wu_HD_2 HD hair scalp
## AAACCCAAGCCAAGGT-1 F31B Wu_HD_2 HD hair scalp
## AAACCCAAGCTCCGAC-1 F31B Wu_HD_2 HD hair scalp
## AAACCCAAGCTGCGAA-1 F31B Wu_HD_2 HD hair scalp
## AAACCCAAGCTTTCCC-1 F31B Wu_HD_2 HD hair scalp
## AAACCCAAGGTCGCCT-1 F31B Wu_HD_2 HD hair scalp
## laboratory score_CD4_T_cells score_CD8_T_cells
## AAACCCAAGAAGGGAT-1 Wu -0.011988469 -0.003625960
## AAACCCAAGCCAAGGT-1 Wu 0.000000000 0.000000000
## AAACCCAAGCTCCGAC-1 Wu -0.004824548 -0.002315556
## AAACCCAAGCTGCGAA-1 Wu -0.002427816 -0.002330474
## AAACCCAAGCTTTCCC-1 Wu -0.002397054 0.000000000
## AAACCCAAGGTCGCCT-1 Wu -0.002139229 0.000000000
## score_Langerhans_cells score_macrophages score_B_cells
## AAACCCAAGAAGGGAT-1 -0.018971118 -0.014439002 -0.004262379
## AAACCCAAGCCAAGGT-1 0.000000000 -0.003045190 0.000000000
## AAACCCAAGCTCCGAC-1 0.000000000 -0.009552606 -0.001900250
## AAACCCAAGCTGCGAA-1 -0.003859848 0.000000000 -0.001912492
## AAACCCAAGCTTTCCC-1 -0.003810941 0.000000000 0.000000000
## AAACCCAAGGTCGCCT-1 -0.010203121 -0.011295131 0.000000000
## score_cuticle score_cortex score_medulla score_IRS
## AAACCCAAGAAGGGAT-1 0.4271370 0.06904114 -0.1596649 -0.6366430
## AAACCCAAGCCAAGGT-1 0.1409309 0.57187683 0.5386603 0.5563725
## AAACCCAAGCTCCGAC-1 0.2117394 0.64974727 0.1171012 0.2163121
## AAACCCAAGCTGCGAA-1 0.2718378 0.46986875 0.1362155 0.1892949
## AAACCCAAGCTTTCCC-1 0.5676369 0.89458440 0.1314865 0.3666754
## AAACCCAAGGTCGCCT-1 0.2212768 0.13689049 0.1760009 0.6300422
## score_proliferative score_ORS score_IFE_basal
## AAACCCAAGAAGGGAT-1 -0.07698009 0.30451667 -0.02855556
## AAACCCAAGCCAAGGT-1 0.14709367 -0.23960574 -0.07742401
## AAACCCAAGCTCCGAC-1 -0.01563200 -0.08061869 -0.10947570
## AAACCCAAGCTGCGAA-1 -0.02720522 0.42334484 -0.11220827
## AAACCCAAGCTTTCCC-1 -0.01699434 -0.04544535 -0.11238948
## AAACCCAAGGTCGCCT-1 0.13342507 0.42245975 -0.14229040
## score_IFE_granular_spinous score_HF-SCs score_melanocytes
## AAACCCAAGAAGGGAT-1 -0.4743133 -0.053820275 -0.3059911
## AAACCCAAGCCAAGGT-1 -0.2928820 0.024924172 0.2237727
## AAACCCAAGCTCCGAC-1 0.2010028 -0.156008362 -0.1356226
## AAACCCAAGCTGCGAA-1 -0.1149472 0.178459426 -0.1531818
## AAACCCAAGCTTTCCC-1 0.0158392 -0.160285759 -0.1387164
## AAACCCAAGGTCGCCT-1 -0.2225996 0.004949276 -0.1747036
## score_sebocytes cell_type Seurat.Phase cyclone.Phase
## AAACCCAAGAAGGGAT-1 -0.02445698 cuticle G1 G1
## AAACCCAAGCCAAGGT-1 -0.05470910 medulla G2M G1
## AAACCCAAGCTCCGAC-1 -0.11491697 cortex G1 G1
## AAACCCAAGCTGCGAA-1 -0.09239270 ORS G2M G1
## AAACCCAAGCTTTCCC-1 -0.06561361 cuticle G2M G1
## AAACCCAAGGTCGCCT-1 -0.10159740 ORS G1 S
## percent.mt percent.rb
## AAACCCAAGAAGGGAT-1 10.308542 21.72707
## AAACCCAAGCCAAGGT-1 15.723270 24.21384
## AAACCCAAGCTCCGAC-1 7.567568 22.16216
## AAACCCAAGCTGCGAA-1 2.022059 24.08088
## AAACCCAAGCTTTCCC-1 5.830389 22.79152
## AAACCCAAGGTCGCCT-1 2.651515 19.57071
We get the cell barcodes for the failing cells :
fail_percent.mt = sobj@meta.data %>% dplyr::filter(percent.mt > cut_percent.mt) %>% rownames()
fail_percent.rb = sobj@meta.data %>% dplyr::filter(percent.rb > cut_percent.rb) %>% rownames()
fail_log_nCount_RNA = sobj@meta.data %>% dplyr::filter(log_nCount_RNA < cut_log_nCount_RNA) %>% rownames()
fail_nFeature_RNA = sobj@meta.data %>% dplyr::filter(nFeature_RNA < cut_nFeature_RNA) %>% rownames()
Without taking into account the low UMI and low number of features cells, we identify doublets.
fsobj = subset(sobj, invert = TRUE,
cells = unique(c(fail_log_nCount_RNA, fail_nFeature_RNA)))
fsobj
## An object of class Seurat
## 27955 features across 8298 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
On this filtered dataset, we apply doublet cells detection. Just before, we run the normalization, taking into account only the remaining cells.
fsobj = Seurat::NormalizeData(fsobj,
normalization.method = "LogNormalize",
assay = "RNA")
fsobj = Seurat::FindVariableFeatures(fsobj,
assay = "RNA",
nfeatures = 3000)
fsobj
## An object of class Seurat
## 27955 features across 8298 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We identify doublet cells :
fsobj = aquarius::find_doublets(sobj = fsobj,
BPPARAM = cl)
## [1] 27955 8298
##
## FALSE TRUE
## 7081 1217
## [14:56:17] WARNING: amalgamation/../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
##
## FALSE TRUE
## 7572 726
##
## FALSE TRUE
## 6861 1437
fail_doublets_consensus = Seurat::WhichCells(fsobj, expression = doublets_consensus.class)
fail_doublets_scDblFinder = Seurat::WhichCells(fsobj, expression = scDblFinder.class)
fail_doublets_hybrid = Seurat::WhichCells(fsobj, expression = hybrid_score.class)
(Time to run : 222.8 s)
We add the information in the non filtered Seurat object :
sobj$doublets_consensus.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_consensus ~ TRUE,
!(colnames(sobj) %in% fail_doublets_consensus) ~ FALSE)
sobj$scDblFinder.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_scDblFinder ~ TRUE,
!(colnames(sobj) %in% fail_doublets_scDblFinder) ~ FALSE)
sobj$hybrid_score.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_hybrid ~ TRUE,
!(colnames(sobj) %in% fail_doublets_hybrid) ~ FALSE)
We can visualize the 4 cells quality with a Venn diagram :
n_filtered = c(fail_percent.mt, fail_percent.rb, fail_log_nCount_RNA, fail_nFeature_RNA) %>%
unique() %>% length()
percent_filtered = round(100*(n_filtered/ncol(sobj)), 2)
ggvenn::ggvenn(list(percent.mt = fail_percent.mt,
percent.rb = fail_percent.rb,
log_nCount_RNA = fail_log_nCount_RNA,
nFeature_RNA = fail_nFeature_RNA),
fill_color = c("#0073C2FF", "#EFC000FF", "orange", "pink"),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::labs(title = "Filtered out cells",
subtitle = paste0(n_filtered, " cells (", percent_filtered, " % of all cells)")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
To visualize the threshold for number of UMI, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "log_nCount_RNA",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_log_nCount_RNA)
Seurat::VlnPlot(sobj, features = "log_nCount_RNA", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_log_nCount_RNA, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_log_nCount_RNA,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "log_nCount_RNA",
subtitle = paste0(length(fail_log_nCount_RNA), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To visualize the threshold for number of features, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "nFeature_RNA",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_nFeature_RNA)
Seurat::VlnPlot(sobj, features = "nFeature_RNA", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_nFeature_RNA, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_nFeature_RNA,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "nFeature_RNA",
subtitle = paste0(length(fail_nFeature_RNA), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To identify a threshold for mitochondrial gene expression, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "percent.mt",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_percent.mt)
Seurat::VlnPlot(sobj, features = "percent.mt", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_percent.mt, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_percent.mt,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "percent.mt",
subtitle = paste0(length(fail_percent.mt), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To identify a threshold for ribosomal gene expression, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "percent.rb",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_percent.rb)
Seurat::VlnPlot(sobj, features = "percent.rb", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_percent.rb, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_percent.rb,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "percent.rb",
subtitle = paste0(length(fail_percent.rb), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
We would like to see if the number of feature expressed by cell, and
the number of UMI is correlated with the cell type, the percentage of
mitochondrial and ribosomal gene expressed, and the doublet status. We
build the log_nCount_RNA by nFeature_RNA
figure, where cells (dots) are colored by these different metrics.
This is the figure, colored by cell type :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "cell_type",
col_colors = unname(color_markers),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the percentage of mitochondrial genes expressed in cell :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "percent.mt",
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the percentage of ribosomal genes expressed in cell :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "percent.rb",
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(doublets_consensus.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "doublets_consensus.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(scDblFinder.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "scDblFinder.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(hybrid_score.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "hybrid_score.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
Do filtered cells belong to a particular cell type ?
sobj$all_cells = TRUE
plot_list = list()
## All cells
df = sobj@meta.data
if (nrow(df) == 0) {
plot_list[[1]] = ggplot()
} else {
plot_list[[1]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = "All cells",
subtitle = paste(nrow(df), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## Doublets consensus
df = sobj@meta.data %>%
dplyr::filter(doublets_consensus.class)
if (nrow(df) == 0) {
plot_list[[2]] = ggplot()
} else {
plot_list[[2]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = "doublets_consensus.class",
subtitle = paste(sum(sobj$doublets_consensus.class, na.rm = TRUE), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## percent.mt
df = sobj@meta.data %>%
dplyr::filter(percent.mt > cut_percent.mt)
if (nrow(df) == 0) {
plot_list[[3]] = ggplot()
} else {
plot_list[[3]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("percent.mt >", cut_percent.mt),
subtitle = paste(length(fail_percent.mt), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## percent.rb
df = sobj@meta.data %>%
dplyr::filter(percent.rb > cut_percent.rb)
if (nrow(df) == 0) {
plot_list[[4]] = ggplot()
} else {
plot_list[[4]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("percent.rb >", cut_percent.rb),
subtitle = paste(length(fail_percent.rb), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## log_nCount_RNA
df = sobj@meta.data %>%
dplyr::filter(log_nCount_RNA < cut_log_nCount_RNA)
if (nrow(df) == 0) {
plot_list[[5]] = ggplot()
} else {
plot_list[[5]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("log_nCount_RNA <", round(cut_log_nCount_RNA, 2)),
subtitle = paste(length(fail_log_nCount_RNA), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## nFeature_RNA
df = sobj@meta.data %>%
dplyr::filter(nFeature_RNA < cut_nFeature_RNA)
if (nrow(df) == 0) {
plot_list[[6]] = ggplot()
} else {
plot_list[[6]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("nFeature_RNA <", round(cut_nFeature_RNA, 2)),
subtitle = paste(length(fail_nFeature_RNA), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
patchwork::wrap_plots(plot_list, ncol = 3) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
We can compare doublet detection methods with a Venn diagram :
ggvenn::ggvenn(list(hybrid = fail_doublets_hybrid,
scDblFinder = fail_doublets_scDblFinder),
fill_color = c("#0073C2FF", "#EFC000FF"),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::ggtitle(label = "Doublet cells") +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"))
We visualize cells annotation for doublets :
plot_list = list()
# scDblFinder.class
sobj$fail = ifelse(sobj$scDblFinder.class,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "fail",
na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "scDblFinder.class",
subtitle = paste0(sum(sobj$scDblFinder.class, na.rm = TRUE), " cells")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# hybrid_score.class
sobj$fail = ifelse(sobj$hybrid_score.class,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "fail",
na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "hybrid_score.class",
subtitle = paste0(sum(sobj$hybrid_score.class, na.rm = TRUE), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
sobj$fail = NULL
# Plot
patchwork::wrap_plots(plot_list, nrow = 1)
We could save this object before filtering (remove
eval = FALSE) :
saveRDS(sobj, paste0(out_dir, "/datasets/", sample_name, "_sobj_unfiltered.rds"))
We remove :
sobj = subset(sobj, invert = TRUE,
cells = unique(c(fail_log_nCount_RNA, fail_nFeature_RNA,
fail_percent.mt, fail_percent.rb,
fail_doublets_consensus)))
sobj
## An object of class Seurat
## 27955 features across 4786 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We normalize the count matrix for remaining cells :
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize",
assay = "RNA")
sobj = Seurat::FindVariableFeatures(sobj,
assay = "RNA",
nfeatures = 3000)
sobj
## An object of class Seurat
## 27955 features across 4786 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We perform a PCA :
sobj = aquarius::dimensions_reduction(sobj = sobj,
assay = "RNA",
reduction = "pca",
max_dims = 100,
verbose = FALSE)
Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca")
We generate a tSNE and a UMAP with 20 principal components :
ndims = 20
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"))
sobj = Seurat::RunUMAP(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_umap"))
We annotate cells for cell type, with the new normalized expression matrix :
score_columns = grep(x = colnames(sobj@meta.data), pattern = "^score", value = TRUE)
sobj@meta.data[, score_columns] = NULL
sobj$cell_type = NULL
sobj = aquarius::cell_annot_custom(sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = TRUE,
verbose = TRUE)
sobj$cell_type = factor(sobj$cell_type, levels = names(cell_markers))
colnames(sobj@meta.data) = stringr::str_replace_all(string = colnames(sobj@meta.data),
pattern = " ",
replacement = "_")
table(sobj$cell_type)
##
## CD4 T cells CD8 T cells Langerhans cells
## 5 2 8
## macrophages B cells cuticle
## 13 21 422
## cortex medulla IRS
## 605 194 171
## proliferative ORS IFE basal
## 202 2196 76
## IFE granular spinous HF-SCs melanocytes
## 597 72 139
## sebocytes
## 63
(Time to run : 26.41 s)
To justify cell type annotation, we can make a dotplot :
markers = c("PTPRC", unique(unlist(dotplot_markers[levels(sobj$cell_type)])))
markers = markers[markers %in% rownames(sobj)]
aquarius::plot_dotplot(sobj, assay = "RNA",
column_name = "cell_type",
markers = markers,
nb_hline = 0) +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(legend.position = "right",
legend.box = "vertical",
legend.direction = "vertical",
axis.title = element_blank(),
axis.text = element_text(size = 15))
We can make a barplot to see the composition of each dataset, and visualize cell types on the projection.
df_proportion = as.data.frame(prop.table(table(sobj$orig.ident,
sobj$cell_type)))
colnames(df_proportion) = c("orig.ident", "cell_type", "freq")
quantif = table(sobj$orig.ident) %>%
as.data.frame.table() %>%
`colnames<-`(c("orig.ident", "nb_cells"))
# Plot
plot_list = list()
plot_list[[2]] = aquarius::plot_barplot(df = df_proportion,
x = "orig.ident",
y = "freq",
fill = "cell_type",
position = ggplot2::position_fill()) +
ggplot2::scale_fill_manual(name = "Cell type",
values = color_markers[levels(df_proportion$cell_type)],
breaks = levels(df_proportion$cell_type)) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = orig.ident, y = 1.05, label = nb_cells),
label.size = 0)
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = "RNA_pca_20_tsne") +
ggplot2::scale_color_manual(values = unlist(color_markers),
breaks = names(color_markers)) +
ggplot2::labs(title = sample_name,
subtitle = paste0(ncol(sobj), " cells")) +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1, widths = c(6, 1))
We annotate cells for cell cycle phase :
cc_columns = aquarius::add_cell_cycle(sobj = sobj,
assay = "RNA",
species_rdx = "hs",
BPPARAM = cl)@meta.data[, c("Seurat.Phase", "Phase")]
##
## G1 G2M S
## 4052 231 503
sobj$Seurat.Phase = cc_columns$Seurat.Phase
sobj$cyclone.Phase = cc_columns$Phase
table(sobj$Seurat.Phase, sobj$cyclone.Phase)
##
## G1 G2M S
## G1 2174 72 261
## G2M 846 114 119
## S 1032 45 123
(Time to run : 406.31 s)
We visualize cell cycle on the projection :
plot_list = list()
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = "RNA_pca_20_tsne") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "Seurat.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = "RNA_pca_20_tsne") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "cyclone.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1)
We make a highly resolutive clustering :
sobj = Seurat::FindNeighbors(sobj, reduction = "RNA_pca", dims = c(1:ndims))
sobj = Seurat::FindClusters(sobj, resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4786
## Number of edges: 165854
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7916
## Number of communities: 29
## Elapsed time: 0 seconds
table(sobj$seurat_clusters)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 540 539 376 331 326 243 220 211 194 169 168 161 155 127 127 122 117 110 103 96
## 20 21 22 23 24 25 26 27 28
## 63 55 52 43 42 34 32 16 14
We can visualize the cell type :
tsne = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = paste0("RNA_pca_", ndims, "_tsne"), cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = paste0("RNA_pca_", ndims, "_umap"), cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We can visualize the cell cycle, from Seurat :
tsne = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We can visualize the cell cycle, from cyclone :
tsne = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We visualize the clustering :
tsne = Seurat::DimPlot(sobj, group.by = "seurat_clusters", label = TRUE,
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "seurat_clusters", label = TRUE,
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We visualize all cell types markers on the tSNE :
markers = dotplot_markers %>% unlist() %>% unname()
markers = markers[markers %in% rownames(sobj)]
plot_list = lapply(markers,
FUN = function(one_gene) {
p = Seurat::FeaturePlot(sobj, features = one_gene,
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
ggplot2::labs(title = one_gene) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
ggplot2::theme(aspect.ratio = 1,
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes()
return(p)
})
patchwork::wrap_plots(plot_list, ncol = 4)
We save the annotated and filtered Seurat object :
saveRDS(sobj, file = paste0(out_dir, "/datasets/", sample_name, "_sobj_filtered.rds"))
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.3.5 patchwork_1.1.2 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] softImpute_1.4 graphlayouts_0.7.0
## [3] pbapply_1.4-2 lattice_0.20-41
## [5] haven_2.3.1 vctrs_0.3.8
## [7] usethis_2.0.1 dynwrap_1.2.1
## [9] blob_1.2.1 survival_3.2-13
## [11] prodlim_2019.11.13 dynutils_1.0.5
## [13] later_1.3.0 DBI_1.1.1
## [15] R.utils_2.11.0 SingleCellExperiment_1.8.0
## [17] rappdirs_0.3.3 uwot_0.1.8
## [19] dqrng_0.2.1 jpeg_0.1-8.1
## [21] zlibbioc_1.32.0 pspline_1.0-18
## [23] pcaMethods_1.78.0 mvtnorm_1.1-1
## [25] htmlwidgets_1.5.4 GlobalOptions_0.1.2
## [27] future_1.22.1 UpSetR_1.4.0
## [29] laeken_0.5.2 leiden_0.3.3
## [31] clustree_0.4.3 parallel_3.6.3
## [33] scater_1.14.6 irlba_2.3.3
## [35] DEoptimR_1.0-9 tidygraph_1.1.2
## [37] Rcpp_1.0.9 readr_2.0.2
## [39] KernSmooth_2.23-17 carrier_0.1.0
## [41] promises_1.1.0 gdata_2.18.0
## [43] DelayedArray_0.12.3 limma_3.42.2
## [45] graph_1.64.0 RcppParallel_5.1.4
## [47] Hmisc_4.4-0 fs_1.5.2
## [49] RSpectra_0.16-0 fastmatch_1.1-0
## [51] ranger_0.12.1 digest_0.6.25
## [53] png_0.1-7 sctransform_0.2.1
## [55] cowplot_1.0.0 DOSE_3.12.0
## [57] ggvenn_0.1.9 here_1.0.1
## [59] TInGa_0.0.0.9000 ggraph_2.0.3
## [61] pkgconfig_2.0.3 GO.db_3.10.0
## [63] DelayedMatrixStats_1.8.0 gower_0.2.1
## [65] ggbeeswarm_0.6.0 iterators_1.0.12
## [67] DropletUtils_1.6.1 reticulate_1.26
## [69] clusterProfiler_3.14.3 SummarizedExperiment_1.16.1
## [71] circlize_0.4.15 beeswarm_0.4.0
## [73] GetoptLong_1.0.5 xfun_0.35
## [75] bslib_0.3.1 zoo_1.8-10
## [77] tidyselect_1.1.0 reshape2_1.4.4
## [79] purrr_0.3.4 ica_1.0-2
## [81] pcaPP_1.9-73 viridisLite_0.3.0
## [83] rtracklayer_1.46.0 rlang_1.0.2
## [85] hexbin_1.28.1 jquerylib_0.1.4
## [87] dyneval_0.9.9 glue_1.4.2
## [89] RColorBrewer_1.1-2 matrixStats_0.56.0
## [91] stringr_1.4.0 lava_1.6.7
## [93] europepmc_0.3 DESeq2_1.26.0
## [95] recipes_0.1.17 labeling_0.3
## [97] httpuv_1.5.2 class_7.3-17
## [99] BiocNeighbors_1.4.2 DO.db_2.9
## [101] annotate_1.64.0 jsonlite_1.7.2
## [103] XVector_0.26.0 bit_4.0.4
## [105] mime_0.9 aquarius_0.1.5
## [107] Rsamtools_2.2.3 gridExtra_2.3
## [109] gplots_3.0.3 stringi_1.4.6
## [111] processx_3.5.2 gsl_2.1-6
## [113] bitops_1.0-6 cli_3.0.1
## [115] batchelor_1.2.4 RSQLite_2.2.0
## [117] randomForest_4.6-14 tidyr_1.1.4
## [119] data.table_1.14.2 rstudioapi_0.13
## [121] org.Mm.eg.db_3.10.0 GenomicAlignments_1.22.1
## [123] nlme_3.1-147 qvalue_2.18.0
## [125] scran_1.14.6 locfit_1.5-9.4
## [127] scDblFinder_1.1.8 listenv_0.8.0
## [129] ggthemes_4.2.4 gridGraphics_0.5-0
## [131] R.oo_1.24.0 dbplyr_1.4.4
## [133] BiocGenerics_0.32.0 TTR_0.24.2
## [135] readxl_1.3.1 lifecycle_1.0.1
## [137] timeDate_3043.102 ggpattern_0.3.1
## [139] munsell_0.5.0 cellranger_1.1.0
## [141] R.methodsS3_1.8.1 proxyC_0.1.5
## [143] visNetwork_2.0.9 caTools_1.18.0
## [145] codetools_0.2-16 Biobase_2.46.0
## [147] GenomeInfoDb_1.22.1 vipor_0.4.5
## [149] lmtest_0.9-38 msigdbr_7.5.1
## [151] htmlTable_1.13.3 triebeard_0.3.0
## [153] lsei_1.2-0 xtable_1.8-4
## [155] ROCR_1.0-7 BiocManager_1.30.10
## [157] scatterplot3d_0.3-41 abind_1.4-5
## [159] farver_2.0.3 parallelly_1.28.1
## [161] RANN_2.6.1 askpass_1.1
## [163] GenomicRanges_1.38.0 RcppAnnoy_0.0.16
## [165] tibble_3.1.5 ggdendro_0.1-20
## [167] cluster_2.1.0 future.apply_1.5.0
## [169] Seurat_3.1.5 dendextend_1.15.1
## [171] Matrix_1.3-2 ellipsis_0.3.2
## [173] prettyunits_1.1.1 lubridate_1.7.9
## [175] ggridges_0.5.2 igraph_1.2.5
## [177] RcppEigen_0.3.3.7.0 fgsea_1.12.0
## [179] remotes_2.4.2 scBFA_1.0.0
## [181] destiny_3.0.1 VIM_6.1.1
## [183] testthat_3.1.0 htmltools_0.5.2
## [185] BiocFileCache_1.10.2 yaml_2.2.1
## [187] utf8_1.1.4 plotly_4.9.2.1
## [189] XML_3.99-0.3 ModelMetrics_1.2.2.2
## [191] e1071_1.7-3 foreign_0.8-76
## [193] withr_2.5.0 fitdistrplus_1.0-14
## [195] BiocParallel_1.20.1 xgboost_1.4.1.1
## [197] bit64_4.0.5 foreach_1.5.0
## [199] robustbase_0.93-9 Biostrings_2.54.0
## [201] GOSemSim_2.13.1 rsvd_1.0.3
## [203] memoise_2.0.0 evaluate_0.18
## [205] forcats_0.5.0 rio_0.5.16
## [207] geneplotter_1.64.0 tzdb_0.1.2
## [209] caret_6.0-86 ps_1.6.0
## [211] DiagrammeR_1.0.6.1 curl_4.3
## [213] fdrtool_1.2.15 fansi_0.4.1
## [215] highr_0.8 urltools_1.7.3
## [217] xts_0.12.1 GSEABase_1.48.0
## [219] acepack_1.4.1 edgeR_3.28.1
## [221] checkmate_2.0.0 scds_1.2.0
## [223] cachem_1.0.6 npsurv_0.4-0
## [225] babelgene_22.3 rjson_0.2.20
## [227] openxlsx_4.1.5 ggrepel_0.9.1
## [229] clue_0.3-60 rprojroot_2.0.2
## [231] stabledist_0.7-1 tools_3.6.3
## [233] sass_0.4.0 nichenetr_1.1.1
## [235] magrittr_2.0.1 RCurl_1.98-1.2
## [237] proxy_0.4-24 car_3.0-11
## [239] ape_5.3 ggplotify_0.0.5
## [241] xml2_1.3.2 httr_1.4.2
## [243] assertthat_0.2.1 rmarkdown_2.18
## [245] boot_1.3-25 globals_0.14.0
## [247] R6_2.4.1 Rhdf5lib_1.8.0
## [249] nnet_7.3-14 RcppHNSW_0.2.0
## [251] progress_1.2.2 genefilter_1.68.0
## [253] statmod_1.4.34 gtools_3.8.2
## [255] shape_1.4.6 HDF5Array_1.14.4
## [257] BiocSingular_1.2.2 rhdf5_2.30.1
## [259] splines_3.6.3 AUCell_1.8.0
## [261] carData_3.0-4 colorspace_1.4-1
## [263] generics_0.1.0 stats4_3.6.3
## [265] base64enc_0.1-3 dynfeature_1.0.0
## [267] smoother_1.1 gridtext_0.1.1
## [269] pillar_1.6.3 tweenr_1.0.1
## [271] sp_1.4-1 ggplot.multistats_1.0.0
## [273] rvcheck_0.1.8 GenomeInfoDbData_1.2.2
## [275] plyr_1.8.6 gtable_0.3.0
## [277] zip_2.2.0 knitr_1.41
## [279] ComplexHeatmap_2.14.0 latticeExtra_0.6-29
## [281] biomaRt_2.42.1 IRanges_2.20.2
## [283] fastmap_1.1.0 ADGofTest_0.3
## [285] copula_1.0-0 doParallel_1.0.15
## [287] AnnotationDbi_1.48.0 vcd_1.4-8
## [289] babelwhale_1.0.1 openssl_1.4.1
## [291] scales_1.1.1 backports_1.2.1
## [293] S4Vectors_0.24.4 ipred_0.9-12
## [295] enrichplot_1.6.1 hms_1.1.1
## [297] ggforce_0.3.1 Rtsne_0.15
## [299] shiny_1.7.1 numDeriv_2016.8-1.1
## [301] polyclip_1.10-0 grid_3.6.3
## [303] lazyeval_0.2.2 Formula_1.2-3
## [305] tsne_0.1-3 crayon_1.3.4
## [307] MASS_7.3-54 pROC_1.16.2
## [309] viridis_0.5.1 dynparam_1.0.0
## [311] rpart_4.1-15 zinbwave_1.8.0
## [313] compiler_3.6.3 ggtext_0.1.0